## Loading required package: xml2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
rawdata <- function(url,node) {
if(node == '.json') {
json <- fromJSON(url)
return(json)
}
webpage <- read_html(url)
extracted <- html_nodes(webpage,node)
return(extracted)
}
# pull deck data from tcgplayer
# =============================
archetype_node = 'td:nth-child(2) a'
date_node = 'td:nth-child(6)'
archetypes <- data.frame( archetype = character(0), date = character(0))
for(i in 1:200) {
url <- str_c('http://decks.tcgplayer.com/magic/deck/search?page=',i,'&format=standard&startdate=1-1-2012&p1=true&p2=true&p3t4=true&p5t8=true&p9t16=true&p17t32=true&p33t64=true')
archetype <- rawdata(url,archetype_node) %>%
str_extract( . , '>(.*?)<') %>%
str_replace( . , '>', '') %>%
str_replace( . , '<', '')
date <- rawdata(url,date_node) %>%
str_extract( . ,'\\n(.*?)\\r') %>%
str_replace( . ,'\\n', '') %>%
str_replace( . ,'\\r', '') %>%
str_trim("both")
staging_archetypes <- data.frame( archetype , date )
archetypes <- rbind(archetypes,staging_archetypes)
}
write.csv(archetypes, file = 'data/archetypes.csv')
# pull set release dates from scryfall
# ====================================
scryfallapi <- 'https://api.scryfall.com/sets'
scryfallsets <- 'https://scryfall.com/sets'
ccountnode <- 'td:nth-child(4) a'
scodenode <- 'small'
mtgsets <- rawdata(scryfallapi,'.json')
setcounts <- data.frame(
setcd = rawdata(scryfallsets, scodenode) %>%
str_replace('<small>','') %>%
str_replace('</small>',''),
cardct = rawdata(scryfallsets,ccountnode) %>%
str_extract('">(.*?)</a>') %>%
str_replace('">','') %>%
str_replace('</a>',''),
stringsAsFactors = FALSE
)
mtgsets$data$code <- toupper(mtgsets$data$code)
mtgsets <- merge( x = mtgsets$data , y = setcounts , by.x = 'code' , by.y = 'setcd' )
mtgsets$released_at <- mtgsets$released_at %>%
str_extract('(.*?)T') %>%
str_replace('T','')
bigsets <- mtgsets %>%
filter(toupper(block) == toupper(name) & set_type == 'expansion') %>%
arrange(as.Date(released_at))
smallsets <- mtgsets %>%
filter( (toupper(block) != toupper(name) & set_type == 'expansion') | (set_type == 'core') ) %>%
arrange(as.Date(released_at))
sm <- data.frame( scode = smallsets$code , sdate = smallsets$released_at , ssize = 'sm', stringsAsFactors = FALSE)
lg <- data.frame( scode = bigsets$code , sdate = bigsets$released_at , ssize = 'lg', stringsAsFactors = FALSE)
setoutput <- rbind(sm,lg) %>%
filter(as.Date(sdate) >= '2011-01-01') %>%
arrange(as.Date(sdate))
write.csv(setoutput, file = "data/sets.csv", quote = FALSE)
# load scraped data from csv
# ==========================
archcsv <- read.csv('data/archetypes.csv', stringsAsFactors = FALSE) %>%
mutate( deckdate = as.Date(date, '%m/%d/%Y'))
prune <- archcsv %>%
group_by(archetype) %>%
count(archetype) %>%
filter(n > 75)
graphdata <- merge( x = archcsv , y = prune , by.x = "archetype" , by.y = "archetype") %>%
select(archetype,deckdate)
bigdates <- read.csv('data/sets.csv', stringsAsFactors = FALSE) %>%
mutate(sdate = as.numeric(as.Date(sdate))) %>%
filter(ssize == 'lg') %>%
select(sdate)
smalldates <- read.csv('data/sets.csv', stringsAsFactors = FALSE) %>%
mutate(sdate = as.numeric(as.Date(sdate))) %>%
filter(ssize == 'sm') %>%
select(sdate)
p <- ggplot(graphdata, aes( x = deckdate , y = ..count.. , fill = archetype), fig.width=100, fig.height=110) +
geom_density(position = "fill") +
theme(legend.position = "none") +
ylab('Archetype Prevalence') +
xlab('Date') +
geom_vline(xintercept = unlist(bigdates), linetype = "solid", color = "black") +
geom_vline(xintercept = unlist(smalldates), linetype = "dotted", color = "gray")
metagraph <- ggplotly(p) %>%
layout(autosize = F, width = 750, height = 500)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
metagraph